from pyspark import SparkConf, SparkContext
import os
import json

from unicodedata import category

os.environ['PYSPARK_PYTHON'] = r"D://Python/Python3107/python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)

# TODO 需求1：城市销售额排名
file_rdd = sc.textFile(r"D://itheima/Python/pyspark案例/orders.txt")
json_str_rdd = file_rdd.flatMap(lambda x: x.split("|"))
dict_rdd = json_str_rdd.map(lambda x: json.loads(x))
city_sale_rdd = dict_rdd.map(lambda x: (x["areaName"], int(x["money"]))).reduceByKey(lambda a, b: a + b)
result1 = city_sale_rdd.sortBy(lambda x: x[1], ascending=False, numPartitions=1)
print(result1.collect())

# TODO 需求2：全部城市都有哪些商品在销售
category_rdd = dict_rdd.map(lambda x: x["category"])
result2 = category_rdd.distinct()
print(result2.collect())

# TODO 需求3：北京市中有哪些商品在销售
city_rdd = dict_rdd.filter(lambda x: x["areaName"] == "北京")
result3 = city_rdd.map(lambda x: x["category"]).distinct()
print(result3.collect())
sc.stop()
